**************************************************************************************************
*** This do file creates the replication datasets for 											*/
*** TITLE																					 	*/
*** AUTHOR																						*/
*** AUTHOR																						*/
*** AUTHOR																						*/
*** Published in: ***																			*/
**************************************************************************************************


/* File description - "dataset_preparation_CAWI.do":
- indicates the link to download all the original datasets needed
- INPUT: sub-folder "original" containing all original datasets
- OUTPUT: recoded datasets in sub-folder "recoded" and "merged.dta" in current directory
- it cleans and recodes the dataset, and keeps only the variables of interest
*/


**************************************************************************************************


/* Before proceeding:
1) Follow all instructions in "dataset_preparation_CATI.do"
2) put "SWG-PANEL-ALL5WAVES_CAWI.dta" into "original" sub-folder, contained into "data" folder
*/

**************************************************************************************************
clear
set more off

global current "C:/Users/andre/Dropbox/Ricerca/PAPER/2016 Text Analysis Italian TV/03_Analysis/"
cd "$current"

global original "${current}data/external/"
global recoded "${current}data/processed/"

use "${original}ITANES_PANEL_2011_13_CAWI.dta", clear

sort ID_UTENTE
gen id=_n
* Numbering in CATI dataset follows
count

cap drop interview_date*
gen interview_date_w1=date(A126, "YMDhms")
gen interview_date_w2=date(B054, "YMDhms")
gen interview_date_w3=date(C042, "DMY")
gen interview_date_w4=date(DATA, "DMY")
gen interview_date_w5=date(DATA_1, "DMY")
format interview_date_* %td

recode A002 (1=0 "NW")(2=1 "NE")(3=2 "C")(4=3 "S")(5=4 "Islands"), gen(area)

gen sex=A004 - 1
label var sex "[0 men; 1 women]"

gen age=A005
destring age, replace

gen educ=A007
recode educ (1 2 3 = 0) (4 5 6 = 1) (7 8 = 2) (9 10 11 = 3)
label define educ 0 "Lower education" 1 "Professional diploma" 2 "High School degre (Liceum)" 3 "University degree or +"
label values educ educ

gen churchat=A104
recode churchat (1 = 0) (2=1) (3=2)(4=3)(5=4) (6 7 =.)
label define churchat 0 "Never" 1 "2-3/year" 2 "1/month" 3 "2-3/month" 4 "1/week"
label values churchat churchat
label var churchat "Frequency of attendance to religious ceremonies"

gen occup=0
replace occup=1 if (A116==2 | A116==3) & A115==1
replace occup=2 if (A116==2 | A116==3 | A116==13) & A115==2
replace occup=3 if (A116==4 | A116==5 | A116==6 | A116==24) & A115==1
replace occup=4 if (A116==4 | A116==5 | A116==6 | A116==24) & A115==2
replace occup=5 if A116 ==7
replace occup=6 if (A116==12 | A116==11 | A116==14 | A116==15 | A116==16 | A116==17)
replace occup=7 if (A116==20 | A116==21 | A116==22)
replace occup=8 if (A118==3 | A118==4)
replace occup=9 if (A118==1)
replace occup=10 if (A118==12 | A118==11 | A118==10)
replace occup=11 if A118==2
label define occup 0 "Missing/unemployed" 1 "Public manager" 2 "Private manager/Professional"  3 "Public employee/teacher/other public worker"  4 "Private employee/other provate worker" 5 "Manual worker"  6 "Small autonomous worker"  7 "Occasioanl worker" 8 "Student"  9 "Retired" 10 "Other not working/NA/wealthy/not able to work" 11 "Houseperson"
label values occup occup

*** Sources of political info: primary
gen polinfo=D035
recode polinfo (10 8 6 5 4 2 = 0) (3 = 2) (7 = 3)
label define polinfo 0 "NA/nessuna queste/cont pers/periodici/radio/amici" 1 "TV" 2 "Newspaper" 3 "Internet"
label values polinfo polinfo
label var polinfo "Principale fonte informazione?"

gen polinfo2=D036
recode polinfo2 (10 8 6 5 4 2 = 0) (3 = 2) (7 = 3)
label define polinfo2 0 "NA/nessuna queste/cont pers/periodici/radio/amici" 1 "TV" 2 "Newspaper" 3 "Internet"
label values polinfo2 polinfo2
label var polinfo2 "Seconda fonte informazione?"


*** Favourite TV news program - Only for waves 1-3 ***

* Wave 1
gen tg_w1 = .
replace tg_w1 = 1 if A087==1 
replace tg_w1 = 2 if A087==2
replace tg_w1 = 3 if A087==3
replace tg_w1 = 4 if A087==4
replace tg_w1 = 5 if A087==5
replace tg_w1 = 6 if A087==6
replace tg_w1 = 7 if A087==7
replace tg_w1 = 8 if A087==8 | A087==9 | A087==10 | A087==12

label define tg 1 "Rai 1" 2 "Rai 2" 3 "Rai 3" 4 "Rete 4" 5 "Canale 5" 6 "Italia 1" 7 "La 7" 8 "Other / DK"
label values tg_w1 tv

* Wave 2
gen tg_w2 = .
replace tg_w2 = 1 if B039==1 
replace tg_w2 = 2 if B039==2
replace tg_w2 = 3 if B039==3
replace tg_w2 = 4 if B039==4
replace tg_w2 = 5 if B039==5
replace tg_w2 = 6 if B039==6
replace tg_w2 = 7 if B039==7
replace tg_w2 = 8 if B039==8 | B039==9 | B039==10 | B039==12

label values tg_w2 tv

* Wave 3
gen tg_w3 = .
replace tg_w3 = 1 if C033==1 
replace tg_w3 = 2 if C033==2
replace tg_w3 = 3 if C033==3
replace tg_w3 = 4 if C033==4
replace tg_w3 = 5 if C033==5
replace tg_w3 = 6 if C033==6
replace tg_w3 = 7 if C033==7
replace tg_w3 = 8 if C033==8 | C033==9 | C033==10 | C033==12

label values tg_w3 tv

*** Frequency watching TV news
gen tg_freq_w1=.
replace tg_freq_w1 = A086 - 1
recode tg_freq_w1 (10 = .)
gen tg_freq_w2=.
replace tg_freq_w2 = B038 - 1
recode tg_freq_w2 (10 = .)
gen tg_freq_w3=.
replace tg_freq_w3 = C032 - 1
recode tg_freq_w3 (10 = .)



* Propensities to Vote [0-10]
cap drop ptv*

* Wave 1
gen ptv_pd_w1 = A081_04
gen ptv_pdl_w1 = A081_06
gen ptv_ln_w1 = A081_09
* Wave 2
gen ptv_pd_w2 = B033_04
gen ptv_pdl_w2 = B033_06
gen ptv_ln_w2 = B033_09
* Wave 3
gen ptv_pd_w3 = C031_04
gen ptv_pdl_w3 = C031_06
gen ptv_ln_w3 = C031_09
* Wave 4
gen ptv_pd_w4 = D033_04
gen ptv_pdl_w4 = D033_06
gen ptv_ln_w4 = D033_09
* Wave 5
gen ptv_pd_w5 = E027_03
gen ptv_pdl_w5 = E027_04
gen ptv_ln_w5 = E027_07

foreach x of varlist ptv_* {
	recode `x' (12 13 = .)
	replace `x' = `x' -1
}

*** Vote for PDL ***
cap drop vote_pdl_w1
gen vote_pdl_w1 = 0
replace vote_pdl_w1=. if A105==88 /* DN */
replace vote_pdl_w1=. if A105==99 /* NA */
replace vote_pdl_w1=1 if A105==6 /* PDL vote intention */
cap drop vote_pdl_w2
gen vote_pdl_w2 = 0
replace vote_pdl_w2=. if B042==88 /* DN */
replace vote_pdl_w2=. if B042==99 /* NA */
replace vote_pdl_w2=1 if B042==6 /* PDL vote intention */
cap drop vote_pdl_w3
gen vote_pdl_w3 = 0
replace vote_pdl_w3=. if C035==88 /* DN */
replace vote_pdl_w3=. if C035==99 /* NA */
replace vote_pdl_w3=1 if C035==6 /* PDL vote intention */

*** Vote for government coalition ***
cap drop vote_gov_w1
gen vote_gov_w1 = 0
replace vote_gov_w1=. if A105==15 /* DN */
replace vote_gov_w1=. if A105==16 /* NA */
replace vote_gov_w1=1 if A105==6 /* PDL vote intention */
replace vote_gov_w1=1 if A105==9 /* LN vote intention */
cap drop vote_gov_w2
gen vote_gov_w2 = 0
replace vote_gov_w2=. if B042==15 /* DN */
replace vote_gov_w2=. if B042==16 /* NA */
replace vote_gov_w2=1 if B042==6 /* PDL vote intention */
replace vote_gov_w2=1 if B042==9 /* LN vote intention */
cap drop vote_gov_w3
gen vote_gov_w3 = 0
replace vote_gov_w3=. if C035==15 /* DN */
replace vote_gov_w3=. if C035==16 /* NA */
replace vote_gov_w3=1 if C035==6 /* PDL vote intention */
replace vote_gov_w3=1 if C035==9 /* LN vote intention */


*** Trust in judicial system (magistrati) ***
cap drop trust*
* Wave 1 
gen trust_mag_w1 = . /* Pooling the two split-half samples in the first wave */
replace trust_mag_w1 = A039_04   /* 1-4 scale */
recode trust_mag_w1 (1=1)(2=3)(3=7)(4=11)(5 6 =.)
replace trust_mag_w1 = A040_04 if trust_mag_w1==.   /* 0-10 scale */
* Wave 2
gen trust_mag_w2 = B010_04
* Wave 3
gen trust_mag_w3 = C011_04

foreach x of varlist trust_* {
	recode `x' (12 13 = .)
	replace `x' = `x'-1
}


*** Best at handling most important problem [PDL government is best]
cap drop best_mip*
* Wave 1 
recode A042 (2 3 4 5 = 0 "PDL gov not best at MIP") (1=1 "PDL gov best at MIP") (6 7=.), gen(best_mip_w1)
* Wave 2
recode B013 (2 3 4 5 = 0 "PDL gov not best at MIP") (1=1 "PDL gov best at MIP") (6 7 =.), gen(best_mip_w2)
* Wave 3
recode C014 (2 3 4 5 = 0 "PDL gov not best at MIP") (1=1 "PDL gov best at MIP") (6 7=.), gen(best_mip_w3)


*** Best at handling immigration problem [PDL government is best]
cap drop best_immig*
* Wave 1 
recode A045_01 (1 3 4 5 = 0 "PDL gov not best at MIP") (2=1 "PDL gov best at MIP") (6 7=.), gen(best_immig_w1)
* Wave 2
recode B016_01 (1 3 4 5 = 0 "PDL gov not best at MIP") (2=1 "PDL gov best at MIP") (6 7=.), gen(best_immig_w2)
* Wave 3
recode C017_01 (1 3 4 5 = 0 "PDL gov not best at MIP") (2=1 "PDL gov best at MIP") (6 7=.), gen(best_immig_w3)


*** Leader's evaluation: Berlusconi
cap drop berl*
gen berl_w1= A070_04
gen berl_w2= B025_04
gen berl_w3= C023_04
gen berl_w4= D025_05
gen berl_w5= E019_04
foreach x of varlist berl_* {
	recode `x' (12 13 14 = .)
	replace `x' = `x'-1
}


*** Party Identification for PD and PDL [strength component] ***

* Categories in the directional component are changed to fit in the PTVs

* Strenght component of Party Id.
gen pid_s1=A054
gen pid_s2=B022
gen pid_s3=C020
gen pid_s4=D022
gen pid_s5=E016

recode pid_s1 (4 5 =1) if (A052 != 24 | A052 != 25 | A052 != 26)
recode pid_s1 (4 5 =.) if (A052 == 24 | A052 == 25 | A052 == 26 | A052==.)

recode pid_s2 (4 5 =1) if (B020 != 24 | B020 != 25 | B020 != 26)
recode pid_s2 (4 5 =.) if (B020 == 24 | B020 == 25 | B020 == 26 | B020==.)

recode pid_s3 (4 5 =1) if (C018 != 24 | C018 != 25 | C018 != 26)
recode pid_s3 (4 5 =.) if (C018 == 24 | C018 == 25 | C018 == 26 | C018==.)

recode pid_s4 (4 5 =1) if (D022 != 24 | D022 != 25 | D022 != 26)
recode pid_s4 (4 5 =.) if (D022 == 24 | D022 == 25 | D022 == 26 | D022==.)

recode pid_s5 (4 5 =1) if (E014 != 24 | E014 != 25 | E014 != 26)
recode pid_s5 (4 5 =.) if (E014 == 24 | E014 == 25 | E014 == 26 | E014==.)

gen pid_pd_w1 = 0
replace pid_pd_w1 = pid_s1 if A052==4
replace pid_pd_w1 = pid_s1 if A052==14
replace pid_pd_w1 = pid_s1 if A052==15
replace pid_pd_w1 = pid_s1 if A052==17

gen pid_pdl_w1 = 0
replace pid_pdl_w1 = pid_s1 if A052==6
replace pid_pdl_w1 = pid_s1 if A052==20

* Wave 2
gen pid_pd_w2 = 0
replace pid_pd_w2 = pid_s2 if B020 ==4
replace pid_pd_w2 = pid_s2 if B020 ==14
replace pid_pd_w2 = pid_s2 if B020 ==15
replace pid_pd_w2 = pid_s2 if B020 ==17

gen pid_pdl_w2 = 0
replace pid_pdl_w2 = pid_s2 if B020 ==6
replace pid_pdl_w2 = pid_s2 if B020 ==20

* Wave 3
gen pid_pd_w3 = 0
replace pid_pd_w3 = pid_s3 if C018 ==4
replace pid_pd_w3 = pid_s3 if C018 ==14
replace pid_pd_w3 = pid_s3 if C018 ==15
replace pid_pd_w3 = pid_s3 if C018 ==17

gen pid_pdl_w3 = 0
replace pid_pdl_w3 = pid_s3 if C018 ==6
replace pid_pdl_w3 = pid_s3 if C018 ==20

* Wave 4
gen pid_pd_w4 = 0
replace pid_pd_w4 = pid_s4 if D020 ==4
replace pid_pd_w4 = pid_s4 if D020 ==14
replace pid_pd_w4 = pid_s4 if D020 ==15
replace pid_pd_w4 = pid_s4 if D020 ==17
replace pid_pd_w4 = pid_s4 if D020 ==23

gen pid_pdl_w4 = 0
replace pid_pdl_w4 = pid_s4 if D020 ==6
replace pid_pdl_w4 = pid_s4 if D020 ==20

* Wave 5
gen pid_pd_w5 = 0
replace pid_pd_w5 = pid_s5 if E014 == 4
replace pid_pd_w5 = pid_s5 if E014 == 15
replace pid_pd_w5 = pid_s5 if E014 == 23

*** Directional PID (all main parties, time-invariant from W1)
recode A052 (24 = 0 "Independent") (1 2 3 5 13 = 1 "Partisan for a leftist party") (4 14 15 17 = 2 "Partisan for PD") (6 20 = 3 "Partisan for PDL") (9 11 19 21=4 "Partisan for rightist party") (7 8 10 12 16 18 22 23= 5 "Other") (25 26=.), gen(pid)


************************************************
***********  Other  variables       ************
************************************************

*** Interest in politics
gen polint_w1 = A011 - 1
recode polint_w1 (4 5 = .)
label var polint_w1 "Interest in Politics W1 [0 not at all 3 much]"

gen polint_w2 = B003 - 1
recode polint_w2 (4 5 = .)
label var polint_w2 "Interest in Politics W1 [0 not at all 3 much]"

gen polint_w3 = C003 - 1
recode polint_w3 (4 5 = .)
label var polint_w3 "Interest in Politics W1 [0 not at all 3 much]"


*** Left-Right self-positioning
gen lr_w1 = A079 -1
recode lr_w1 (11 12 = .)
label var lr_w1 "Left-Right self-reported [0 left 10 right]"

gen lr_w2 = B031 -1
recode lr_w2 (11 12 = .)
label var lr_w2 "Left-Right self-reported [0 left 10 right]"

gen lr_w3 = C029 -1
recode lr_w3 (11 12 = .)
label var lr_w3 "Left-Right self-reported [0 left 10 right]"

gen lr_w4 = D031 -1
recode lr_w4 (11 12 = .)
label var lr_w4 "Left-Right self-reported [0 left 10 right]"

gen lr_w5 = E025 -1
recode lr_w5 (11 12 = .)
label var lr_w5 "Left-Right self-reported [0 left 10 right]"

*** LR distance from PDL's perceived position
foreach x of varlist A080* B032* C030* D032* E026* {
	replace `x' = `x' - 1
	recode `x' 11 12=.
}
gen lrdist_pdl_w1 = abs(lr_w1 - A080_01)
gen lrdist_pdl_w2 = abs(lr_w2 - B032_01) 
gen lrdist_pdl_w3 = abs(lr_w3 - C030_01) 
gen lrdist_pdl_w4 = abs(lr_w4 - D032_01) 
gen lrdist_pdl_w5 = abs(lr_w5 - E026_01)

*** LR distance from PDL's perceived position
gen lrdist_pd_w1 = abs(lr_w1 - A080_02)
gen lrdist_pd_w2 = abs(lr_w2 - B032_02) 
gen lrdist_pd_w3 = abs(lr_w3 - C030_02) 
gen lrdist_pd_w4 = abs(lr_w4 - D032_02) 
gen lrdist_pd_w5 = abs(lr_w5 - E026_02)

*** Retrospective sociotropic economic assessment
gen retsoc_w1 = A012 - 1
recode retsoc_w1 (5 6 = .)
label var retsoc_w1 "Retro Sociot. economy [0 much worse 4 much better]"

gen retsoc_w2 = B004 - 1
recode retsoc_w2 (5 6 = .)
label var retsoc_w2 "Retro Sociot. economy [0 much worse 4 much better]"

gen retsoc_w3 = C004 - 1
recode retsoc_w3 (5 6 = .)
label var retsoc_w3 "Retro Sociot. economy [0 much worse 4 much better]"

gen retsoc_w4 = D004 - 1
recode retsoc_w4 (5 6 = .)
label var retsoc_w4 "Retro Sociot. economy [0 much worse 4 much better]"

gen retsoc_w5 = E004 - 1
recode retsoc_w5 (5 6 = .)
label var retsoc_w5 "Retro Sociot. economy [0 much worse 4 much better]"

forvalues i = 1/5 {
	gen retsoc01_w`i' = .
	replace retsoc01_w`i' = -1 if (retsoc_w`i'==0 | retsoc_w`i' ==1)
	replace retsoc01_w`i' = 0 if (retsoc_w`i'==2)
	replace retsoc01_w`i' = 1 if (retsoc_w`i'==3 | retsoc_w`i' ==4)
	label var retsoc01_w`i' "Retro Sociot. economy [-1 worse; 0 same; 1 better]"
}


*** Retrospective evaluation of the government

gen gov_w1=. /* Pooling the two split-half samples in the first wave */
replace gov_w1=A015     /* 1-4 scale */
recode gov_w1 (1=10)(2=7)(3=3)(4=0)(5 6 =.)
replace gov_w1=A016-1 if gov_w1==. /* 0-10 scale */
recode gov_w1 (11 12=.)

gen gov_w2=B006-1
recode gov_w2 (11 12=.)

gen gov_w3=C006-1
recode gov_w3 (11 12=.)

gen gov_w4=D010-1
recode gov_w4 (11 12=.)

gen gov_w5=E006-1
recode gov_w5 (11 12=.)

forvalues i = 1/5 {
	label var gov_w`i' "Government approval Wave `i' [0-10]"
}


*** Vote intention for PDL
gen vintent_w1 = 0
replace vintent_w1 = 1 if A105== 6

gen vintent_w2 = 0
replace vintent_w2 = 1 if B042 == 6

gen vintent_w3 = 0
replace vintent_w3 = 1 if C035 == 6

gen vintent_w4 = 0
replace vintent_w4 = 1 if D040== 6

gen vintent_w5 = 0
replace vintent_w5 = 1 if E081== 8

****************************
*** VARIABLES' CHECK********
foreach x of varlist id-vintent_w5 {
	sum `x'
}

keep id-vintent_w5
sort id

gen dataset = 1
label define dataset 0 "CATI" 1 "CAWI"
label values dataset dataset

save "${recoded}dataset_idlevel_CAWI.dta", replace


* This requires running "dataset_preparation_CATI.do"
use "${recoded}dataset_idlevel_CAWI.dta", clear
count    /* N= 1497 CAWI */
append using "${recoded}dataset_idlevel_CATI.dta", generate(cawi_cati)
count    /* N= 6911 CAWI + CATI */

save "${recoded}merged_dataset_ID.dta", replace
clear
